# SAMPLE NAME
## specify sample name
sample.name <- c("beau", "ophio_cflo", "ophio_kim")
## specify names for all ocflo samples
sampleName <- c("ophio_cflo","ophio_ophio-infected")
## color scheme for the samples
col.scheme <- c("#5A829F", "#AD212F", "black", "#5C2849")
# SCRIPT NAME
## specify the name of the script (folder) where figures will be saved
script.name <- "01_comparing_gene_exp_ophio_beau"
# eJTK OUTPUT
## Set GammaP threshold below which genes are classified as rhythmic
gamma.pval = 0.05
## Set false discovery rate for functional enrichment analyses
FDR = 5
# LOAD DATABASES (TC7)
# 1. TC6_ejtk.db
# Desc: This database contains all ejtk-output for TC6
ejtk.db <- dbConnect(RSQLite::SQLite(), paste0(path_to_repo,"/data/databases/TC6_fungal_ejtk.db"))
# which tables are in the database
src_dbi(ejtk.db)
## src: sqlite 3.29.0 [/Users/biplabendudas/Documents/GitHub/Das_et_al_2022a/data/databases/TC6_fungal_ejtk.db]
## tbls: beau_rhythmic_genes_12h, beau_rhythmic_genes_24h, beau_zscores_08h,
## beau_zscores_12h, beau_zscores_24h, ophio_cflo_rhythmic_genes_12h,
## ophio_cflo_rhythmic_genes_24h, ophio_cflo_zscores_08h,
## ophio_cflo_zscores_12h, ophio_cflo_zscores_24h, ophio_kim_DD_zscores_24h,
## ophio_kim_LD_rhythmic_genes_24h, ophio_kim_LD_zscores_24h
#
# 2. TC6_data.db
data.db <- dbConnect(RSQLite::SQLite(), paste0(path_to_repo,"/data/databases/TC6_fungal_data.db"))
src_dbi(data.db)
## src: sqlite 3.29.0 [/Users/biplabendudas/Documents/GitHub/Das_et_al_2022a/data/databases/TC6_fungal_data.db]
## tbls: beau_expressed_genes, beau_fpkm, beau_log2fpkm, beau_zscores,
## ophio_cflo_expressed_genes, ophio_cflo_fpkm, ophio_cflo_log2fpkm,
## ophio_cflo_zscores, ophio_kim_DD_expressed_genes, ophio_kim_DD_fpkm,
## ophio_kim_DD_log2fpkm, ophio_kim_DD_zscores, ophio_kim_expressed_genes,
## ophio_kim_fpkm, ophio_kim_log2fpkm, ophio_kim_zscores
#
# number of all genes
all.genes <- list()
for (i in 1:length(sample.name)) {
all.genes[[i]] <- tbl(data.db, paste0(sample.name[[i]] ,"_fpkm")) %>%
collect()
writeLines(paste("Number of genes in", sample.name[[i]], ":", nrow(all.genes[[i]])))
}
## Number of genes in beau : 10364
## Number of genes in ophio_cflo : 7455
## Number of genes in ophio_kim : 8577
# A1: genes that have NO expression (FPKM == 0 at all time points)
not.expressed <- list()
for (i in 1:length(sample.name)) {
not.expressed[[i]] <-
tbl(data.db, paste0(sample.name[[i]] ,"_fpkm")) %>%
collect() %>%
filter_at(vars(starts_with("Z")), all_vars(. == 0)) %>%
pull(gene_name)
# How many genes are not expressed?
writeLines(paste("n(genes-NOT-EXPRESSED) in", sample.name[[i]], ":", length(not.expressed[[i]])))
}
## n(genes-NOT-EXPRESSED) in beau : 759
## n(genes-NOT-EXPRESSED) in ophio_cflo : 190
## n(genes-NOT-EXPRESSED) in ophio_kim : 111
# A2: run enrichment (make plot of enrichment found of non-expressed genes)
for (i in 1:length(sample.name)) {
writeLines(paste("running GO enrichment for NOT-EXPRESSED genes in", sample.name[[i]]))
# run enrichment
not.expressed[[i]] %>%
check_enrichment(.,
org = sample.name[[i]],
what = "pfams",
bg = 'all',
expand = T) %>%
# which enterotoxin genes are not expressed in culture
# filter(annot_desc=="Enterotoxin_a") %>%
# # pull gene names for a given GO term
# separate_rows(., gene_name, sep = ", ") %>%
# filter(GO == "GO:0009405") %>% # pathogenesis
# # filter(GO == "GO:0090729") %>% # toxin activity
# # filter(GO == "GO:0044419") %>% # interspecies interaction between organisms
# # filter(GO == "GO:0020037") %>% # heme binding
# pull()
as.data.frame() %>%
DT::datatable(.)
# print()
}
## running GO enrichment for NOT-EXPRESSED genes in beau
## [1] "Loading annotation file for Beauveria bassiana"
## [1] "Done."
## [1] "Testing for enrichment..."
## running GO enrichment for NOT-EXPRESSED genes in ophio_cflo
## [1] "Loading annotation file for Ophiocordyceps camponoti-floridani"
## [1] "Done."
## [1] "Testing for enrichment..."
## running GO enrichment for NOT-EXPRESSED genes in ophio_kim
## [1] "Loading annotation file for Ophiocordyceps kimflemingae"
## [1] "Done."
# B: genes that are expressed (FPKM > 1 for at least one time point)
expressed <- list()
for (i in 1:length(sample.name)) {
expressed[[i]] <-
tbl(data.db, paste0(sample.name[[i]],"_expressed_genes")) %>%
filter(expressed=="yes") %>%
collect() %>%
pull(gene_name)
# How many genes are expressed?
writeLines(paste("n(EXPRESSED) in", sample.name[[i]], ":", length(expressed[[i]])))
}
## n(EXPRESSED) in beau : 9006
## n(EXPRESSED) in ophio_cflo : 6998
## n(EXPRESSED) in ophio_kim : 8150
## Load all the rhythmic genesets
## Note, ordered according to their p-value; highly rhythmic at the top.
#
# Choose period
period = '24'
##
rhy <- list()
for (i in 1:2) {
rhy[[i]] <-
tbl(ejtk.db, paste0(sample.name[[i]],"_zscores_",period,'h')) %>%
filter(GammaP < gamma.pval) %>%
select(ID, GammaP) %>% collect() %>% arrange(GammaP) %>%
select(ID) %>% pull()
# How many genes are rythmic?
writeLines(paste0("n(rhythmic-",period, "h) in ", sample.name[[i]], " : ", length(rhy[[i]])))
}
## n(rhythmic-24h) in beau : 1872
## n(rhythmic-24h) in ophio_cflo : 2285
## initialise lists to hold input and output of the hierarchical clustering
zscore.dat <- list() # zscore data (input)
my_gene_col <- list() # cluster identity for each rhythmic gene (output)
rhy.heat <- list() # pheatmap that can be saved/plotted (output)
# specify number of clusters
n_clusters <- 4
## run clustering and plot
for (i in 1:2) {
## load zscore dataset
zscore.dat[[i]] <- data.db %>% tbl(., paste0(sample.name[[i]],"_zscores")) %>% collect()
# Filter the zscores to keep only rhythmic genes
zscore.rhy <-
zscore.dat[[i]] %>%
filter(gene_name %in% rhy[[i]]) %>%
as.data.frame()
# Set genes as rownames and convert it into a matrix
rownames(zscore.rhy) = zscore.rhy$gene_name
zscore.rhy <- as.matrix(zscore.rhy[-1])
# Hierarchical clustering of the genesets
my_hclust_gene <- hclust(dist(zscore.rhy), method = "complete")
# Make annotations for the heatmaps
my_clusters <- cutree(tree = as.dendrogram(my_hclust_gene), k = n_clusters) # k= clusters
my_gene_col[[i]] <- data.frame(cluster = my_clusters)
# I’ll add some column annotations and create the heatmap.
# Annotations for:
# 1. Is the sample collected during the light or dark phase?
my_sample_col <- data.frame(phase = rep(c("light", "dark", "light"), c(5,6,1)))
row.names(my_sample_col) <- colnames(zscore.rhy)
# Manual color palette
my_colour = list(
phase = c(light = "#F2E18D", dark = "#010440"),
cluster = viridis::cividis(100)[c(seq(10,80,by=round(80/(n_clusters), 0)))])
# Color scale
my.breaks = seq(min(zscore.rhy), max(zscore.rhy), by=0.1)
# my.breaks = seq(min(zscore.rhy), max(zscore.rhy), by=0.06)
# Let's plot!
rhy.heat[[i]] <-
pheatmap(zscore.rhy, show_rownames = F, show_colnames = F,
annotation_row = my_gene_col[[i]],
annotation_col = my_sample_col,
cutree_rows = n_clusters, # OG was 4
# cutree_cols = 2,
annotation_colors = my_colour,
border_color=FALSE,
cluster_cols = F,
breaks = my.breaks,
## color scheme borrowed from:
color = inferno(length(my.breaks) - 1),
treeheight_row = 0,
# treeheight_col = 0,
# remove the color scale or not
main = paste0(sample.name[[i]], " 24h-rhythmic \n (n=", nrow(zscore.rhy), " genes)"),
## annotation legend
annotation_legend = T,
## Color scale
legend = T)
}
rhy.24.sig <- list()
phase.ejtk <- list()
# Obtain the phases of 24h-rhythmic genes beau v. ophio_cflo
for (i in 1:2) {
rhy.24.sig[[i]] <-
tbl(ejtk.db, paste0(sample.name[i],"_zscores_24h")) %>%
filter(GammaP < gamma.pval) %>%
collect()
# Get the phases of the best matched waveforms
phase.ejtk[[i]] <- circular::circular(rhy.24.sig[[i]]$Phase, units="hours", template="clock24")
# # Get the time-of-day of expression peak
# phase.ejtk[[i]] <- circular::circular(rhy.24.sig[[i]]$MaxLoc, units="hours", template="clock24")
# # Get the time-of-day of expression trough
# phase.ejtk[[i]] <- circular::circular(rhy.24.sig[[i]]$MinLoc, units="hours", template="clock24")
}
# save all the circular phases in a list
l.phases <- phase.ejtk
# let's name the list elements for later use and reference
names(l.phases) <- sample.name[1:2]
writeLines("Performing Watson test to check if the average peak of 24h-rhythms in Beau and Ophio-cflo differs significantly")
## Performing Watson test to check if the average peak of 24h-rhythms in Beau and Ophio-cflo differs significantly
# For all rhy genes
beau.ophio <- watson.two.test(l.phases[[1]],l.phases[[2]], alpha = FDR/100)
writeLines("Beau v. Ophio-cflo")
## Beau v. Ophio-cflo
beau.ophio %>% print()
##
## Watson's Two-Sample Test of Homogeneity
##
## Test Statistic: 5.9634
## Level 0.05 Critical Value: 0.187
## Reject Null Hypothesis
## Plot the phase distributions
# Initialize a list for saving the ggplots
g <- list()
means <- as.numeric(lapply(phase.ejtk, mean))
means <- circular(means, units="hours", template="clock24")
for(i in 1:length(l.phases)) {
# define phase levels
ordered_phases <- c("2","4","6","8","10","12",
"14","16","18","20","22","24")
df.test <- l.phases[[i]] %>%
as.data.frame() %>%
mutate(phase = x) %>%
mutate(phase = replace(phase, x=="0", "24")) %>%
select(-x) %>%
group_by(phase = factor(phase, levels = ordered_phases)) %>%
summarise(n_genes = n())
m <- as.numeric(means[i])
g[[i]] <-
ggplot(df.test, aes(x=factor(phase), y=n_genes)) +
# indicate light-dark phase
geom_rect(aes(xmin = as.factor(12), xmax = as.factor(24), ymin = -Inf, ymax = Inf),
fill = "grey80", alpha = 0.05, color=NA) +
geom_bar(stat='identity', fill=col.scheme[[i]]) +
xlab(c(names(l.phases)[i])) +
scale_y_continuous(breaks = c(0,300,600), limits = c(0,700)) +
coord_polar() +
theme_Publication(base_size = 20) +
theme(# axis.title.x=element_blank(),
# axis.text.x=element_blank(),
legend.position = "none")
#ggtitle(paste0("Dataset: ", names(l.phases)[i]))
}
ggpubr::ggarrange(plotlist=g,
nrow = 1, ncol = 2,
widths = c(1,1), labels = NA)
for (i in 1:n_clusters){
writeLines(paste0("Species: ", sample.name[[1]], "\n", "24h-rhythmic genes, Cluster: ", i))
# Summary
genes <- my_gene_col[[1]] %>% rownames_to_column("g") %>% filter(cluster==as.character(i)) %>% pull(g)
writeLines(paste0("n(genes) = ", length(genes),"\n"))
# Enrichment
overrepresented.terms <-
genes %>%
check_enrichment(.,
function.dir = path_to_repo,
what = "pfams",
org = sample.name[[1]],
bg = expressed[[1]],
filter = T,
plot = F)
writeLines(paste0("\n", "n(overrepresented terms) = ", nrow(overrepresented.terms), "\n"))
print(overrepresented.terms %>% as_tibble())
# Stacked zplot
genes %>%
stacked.zplot_tc6(cond = "beau", plot.mean = F) %>%
multi.plot(rows = 1, cols = 1)
}
## Species: beau
## 24h-rhythmic genes, Cluster: 1
## n(genes) = 767
##
## [1] "Loading annotation file for Beauveria bassiana"
## [1] "Done."
## [1] "Testing for enrichment..."
##
## n(overrepresented terms) = 28
##
## # A tibble: 28 x 7
## annot_term annot_desc adj_pVal sam_freq back_freq n_annot_bg gene_name
## <chr> <chr> <dbl> <dbl> <dbl> <int> <chr>
## 1 PF03810 IBN_N 0 0.009 0.001 8 BBA_00780, BBA…
## 2 PF07714 Pkinase_Tyr 0 0.036 0.012 105 BBA_00171, BBA…
## 3 PF00069 Pkinase 0 0.037 0.014 124 BBA_00171, BBA…
## 4 PF02535 Zip 0.00008 0.007 0.001 8 BBA_01230, BBA…
## 5 PF00689 Cation_ATP… 0.00012 0.008 0.001 12 BBA_00548, BBA…
## 6 PF00512 HisKA 0.000130 0.007 0.001 9 BBA_00328, BBA…
## 7 PF05729 NACHT 0.000130 0.009 0.002 17 BBA_01725, BBA…
## 8 PF00072 Response_r… 0.000130 0.008 0.001 13 BBA_00328, BBA…
## 9 PF00690 Cation_ATP… 0.000130 0.008 0.001 13 BBA_00548, BBA…
## 10 PF04082 Fungal_tra… 0.00017 0.032 0.014 125 BBA_00237, BBA…
## # … with 18 more rows
## Species: beau
## 24h-rhythmic genes, Cluster: 2
## n(genes) = 550
##
## [1] "Loading annotation file for Beauveria bassiana"
## [1] "Done."
## [1] "Testing for enrichment..."
##
## n(overrepresented terms) = 2
##
## # A tibble: 2 x 7
## annot_term annot_desc adj_pVal sam_freq back_freq n_annot_bg gene_name
## <chr> <chr> <dbl> <dbl> <dbl> <int> <chr>
## 1 "PF00226" DnaJ 0.00041 0.013 0.003 23 BBA_00135, BBA_0…
## 2 "" no_desc 0.00199 0.819 0.757 6815 BBA_00007, BBA_0…
## Species: beau
## 24h-rhythmic genes, Cluster: 3
## n(genes) = 337
##
## [1] "Loading annotation file for Beauveria bassiana"
## [1] "Done."
## [1] "Testing for enrichment..."
##
## n(overrepresented terms) = 2
##
## # A tibble: 2 x 7
## annot_term annot_desc adj_pVal sam_freq back_freq n_annot_bg gene_name
## <chr> <chr> <dbl> <dbl> <dbl> <int> <chr>
## 1 PF08659 KR 0.0170 0.021 0.007 60 BBA_01241, BBA_…
## 2 PF13561 adh_short_… 0.0198 0.021 0.008 68 BBA_00446, BBA_…
## Species: beau
## 24h-rhythmic genes, Cluster: 4
## n(genes) = 218
##
## [1] "Loading annotation file for Beauveria bassiana"
## [1] "Done."
## [1] "Testing for enrichment..."
##
## n(overrepresented terms) = 2
##
## # A tibble: 2 x 7
## annot_term annot_desc adj_pVal sam_freq back_freq n_annot_bg gene_name
## <chr> <chr> <dbl> <dbl> <dbl> <int> <chr>
## 1 "PF00076" RRM_1 0.00656 0.023 0.006 55 BBA_00301, BBA_0…
## 2 "" no_desc 0.00943 0.832 0.757 6815 BBA_00129, BBA_0…
for (i in 1:n_clusters){
writeLines(paste0("Species: ", sample.name[[2]], "\n", "24h-rhythmic genes, Cluster: ", i))
# Summary
genes <- my_gene_col[[2]] %>% rownames_to_column("g") %>% filter(cluster==as.character(i)) %>% pull(g)
writeLines(paste0("n(genes) = ", length(genes),"\n"))
# Enrichment
overrepresented.terms <-
genes %>%
check_enrichment(.,
function.dir = path_to_repo,
what = "pfams",
org = sample.name[[2]],
bg = expressed[[2]],
filter = T,
plot = F)
writeLines(paste0("\n", "n(overrepresented terms) = ", nrow(overrepresented.terms), "\n"))
print(overrepresented.terms %>% as_tibble())
# Stacked zplot
stacked.plot1 <- genes %>% stacked.zplot_tc6(cond = sampleName[[1]]) %>% pluck(1)
stacked.plot2 <- genes %>% stacked.zplot_tc6(cond = sampleName[[2]]) %>% pluck(1)
ggpubr::ggarrange(plotlist=list(stacked.plot1, stacked.plot2),
nrow = 1, ncol = 2,
widths = c(1,1), labels = NA) %>%
print()
}
## Species: ophio_cflo
## 24h-rhythmic genes, Cluster: 1
## n(genes) = 833
##
## [1] "Loading annotation file for Ophiocordyceps camponoti-floridani"
## [1] "Done."
## [1] "Testing for enrichment..."
##
## n(overrepresented terms) = 25
##
## # A tibble: 25 x 7
## annot_term annot_desc adj_pVal sam_freq back_freq n_annot_bg gene_name
## <chr> <chr> <dbl> <dbl> <dbl> <int> <chr>
## 1 PF00512 HisKA 0.00003 0.01 0.002 7 GQ602_001137, …
## 2 PF00072 Response_r… 0.00008 0.012 0.002 10 GQ602_001137, …
## 3 PF00632 HECT 0.00008 0.009 0.001 6 GQ602_000948, …
## 4 PF00664 ABC_membra… 0.00038 0.014 0.003 15 GQ602_000447, …
## 5 PF14604 SH3_9 0.00066 0.012 0.003 13 GQ602_000765, …
## 6 PF07653 SH3_2 0.00259 0.01 0.003 12 GQ602_000765, …
## 7 PF00018 SH3_1 0.00337 0.012 0.004 16 GQ602_000765, …
## 8 PF02518 HATPase_c 0.00337 0.01 0.003 13 GQ602_001137, …
## 9 PF02801 Ketoacyl-s… 0.00337 0.01 0.003 13 GQ602_002162, …
## 10 PF00109 ketoacyl-s… 0.00498 0.01 0.003 14 GQ602_002162, …
## # … with 15 more rows
## Species: ophio_cflo
## 24h-rhythmic genes, Cluster: 2
## n(genes) = 465
##
## [1] "Loading annotation file for Ophiocordyceps camponoti-floridani"
## [1] "Done."
## [1] "Testing for enrichment..."
##
## n(overrepresented terms) = 2
##
## # A tibble: 2 x 7
## annot_term annot_desc adj_pVal sam_freq back_freq n_annot_bg gene_name
## <chr> <chr> <dbl> <dbl> <dbl> <int> <chr>
## 1 PF04082 Fungal_tra… 0.00094 0.039 0.013 54 GQ602_000498, G…
## 2 PF00172 Zn_clus 0.0468 0.036 0.017 75 GQ602_000498, G…
## Species: ophio_cflo
## 24h-rhythmic genes, Cluster: 3
## n(genes) = 354
##
## [1] "Loading annotation file for Ophiocordyceps camponoti-floridani"
## [1] "Done."
## [1] "Testing for enrichment..."
##
## n(overrepresented terms) = 0
##
## # A tibble: 0 x 7
## # … with 7 variables: annot_term <chr>, annot_desc <chr>, adj_pVal <dbl>,
## # sam_freq <dbl>, back_freq <dbl>, n_annot_bg <int>, gene_name <chr>
## Species: ophio_cflo
## 24h-rhythmic genes, Cluster: 4
## n(genes) = 633
##
## [1] "Loading annotation file for Ophiocordyceps camponoti-floridani"
## [1] "Done."
## [1] "Testing for enrichment..."
##
## n(overrepresented terms) = 4
##
## # A tibble: 4 x 7
## annot_term annot_desc adj_pVal sam_freq back_freq n_annot_bg gene_name
## <chr> <chr> <dbl> <dbl> <dbl> <int> <chr>
## 1 PF00118 Cpn60_TCP1 0 0.024 0.002 10 GQ602_000364, GQ…
## 2 PF00076 RRM_1 0 0.058 0.014 60 GQ602_000255, GQ…
## 3 PF00004 AAA 0.00033 0.034 0.01 45 GQ602_000605, GQ…
## 4 PF00400 WD40 0.0168 0.041 0.021 91 GQ602_000228, GQ…
reducing redundant GO terms
Note to self: If we use GO terms instead of pfams, we will need to reduce the redundant GO annotations that is present in fungal annotation data.
Two options to do this:
Option #1: Get the list of overrepresented GO terms and their associated p-values and use REVIGO portal online to reduce the redundant terms
Option #2: Use the scripts provided by REVIGO to programmatically run REVIGO using bash/R. For more information see (here)[http://revigo.irb.hr/FAQ.aspx#q07] Status: tried running it via bash, and it didn’t work; NEED TO FIGURE IT OUT.
Next, we compare the homologous genes in both the fungi to understand if the rhythmic genes (and processes) in the two fungi are similar or not; also, is there any differences in the daily expression of these genes between the two fungal parasites?
# Read the source file
homology.file <- "ophio_beau_homology.csv"
homology.file <-
paste0(path_to_repo, "/results/proteinortho/", homology.file) %>%
read.csv(., stringsAsFactors = F, na.strings = c(" ","","NA"))
# Clean the source file to keep distinct gene-gene homologs
homology.dat <-
homology.file %>%
# names() %>%
select(ophio_gene, beau_gene) %>%
na.omit() %>%
distinct() %>%
group_by(beau_gene) %>%
filter(n()==1) %>%
select(beau_gene, ophio_gene)
writeLines(paste("Of the", length(expressed[[2]]), "genes expressed in Ophio-cflo,",
"and", length(expressed[[1]]), "genes expressed in Beau,\n",
nrow(homology.dat), "genes show one-to-one orthology"))
## Of the 6998 genes expressed in Ophio-cflo, and 9006 genes expressed in Beau,
## 5274 genes show one-to-one orthology
for (i in 1:2){
# exp.dat <- expressed[[i]]
rhy.dat <- rhy[[i]]
ortho.dat <- homology.dat %>% pull(i)
listInput <- list(rhy.dat, ortho.dat)
names(listInput) <- c(paste0(sample.name[[i]], c("_rhy24","_ortho")))
library(UpSetR)
library(viridis)
# caste.col <- c("#F23030","#1A80D9")
upset(fromList(listInput),
number.angles = 0, point.size = 3, line.size = 1.5,
mainbar.y.label = "Number of overlapping genes",
sets.x.label = "Sig. rhy genes",
text.scale = c(1.5, # y-axis label ("# overlapping genes")
2, # y-axis tick labels ("1000, 2000,..")
1.5, # label for histogram ("sig. rhy genes")
1, # tick labels for histogram
1.5, # set names ("Cflo-brain_08h,..")
1.5),
sets = names(listInput),
nintersects = 15,
keep.order = T,
sets.bar.color = viridis(1),
# adding queries
query.legend = "bottom"
) %>%
print()
writeLines(paste0(
"The above plot shows that of the ", length(rhy.dat), " rhythmic genes in ", sample.name[[i]],
", how many has an 1:1 ortholog in the other fungal species. \n",
"Note, orthologous genes were identified using proteinortho "))
}
## The above plot shows that of the 1872 rhythmic genes in beau, how many has an 1:1 ortholog in the other fungal species.
## Note, orthologous genes were identified using proteinortho
## The above plot shows that of the 2285 rhythmic genes in ophio_cflo, how many has an 1:1 ortholog in the other fungal species.
## Note, orthologous genes were identified using proteinortho
First, let’s explore the genes that are 24h-rhythmic in a fungal species but lack a one-to-one ortholog in the other species.
rhy.not.homolog <- list()
for(i in 1:2){
# save the names of genes
rhy.not.homolog[[i]] <- setdiff(rhy[[i]], homology.dat[[i]])
# run enrichment
rhy.not.homolog[[i]] %>%
check_enrichment(.,
org=sample.name[[i]],
expand = T,
what = "pfams") %>%
# filter(annot_desc=="Enterotoxin_a") %>%
# pull(gene_name) %>%
# stacked.zplot_tc6(cond="ophio",plot.mean = F, bg.alpha = 0.5)
print()
}
## [1] "Loading annotation file for Beauveria bassiana"
## [1] "Done."
## [1] "Testing for enrichment..."
## # A tibble: 74 x 3
## # Groups: gene_name [74]
## gene_name gene_desc annot_desc
## <chr> <chr> <chr>
## 1 BBA_00071 not_available Zn_clus
## 2 BBA_00237 not_available Zn_clus; Fungal_trans
## 3 BBA_00346 not_available DAO
## 4 BBA_00450 not_available Fungal_trans_2
## 5 BBA_00452 not_available DAO
## 6 BBA_00545 not_available Beta-lactamase
## 7 BBA_00962 not_available FAD_binding_3
## 8 BBA_01641 not_available Fungal_trans_2; Zn_clus
## 9 BBA_01701 not_available Fungal_trans_2
## 10 BBA_01725 not_available NACHT
## # … with 64 more rows
## [1] "Loading annotation file for Ophiocordyceps camponoti-floridani"
## [1] "Done."
## [1] "Testing for enrichment..."
## # A tibble: 262 x 3
## # Groups: gene_name [262]
## gene_name gene_desc annot_desc
## <chr> <chr> <chr>
## 1 GQ602_000017 hypothetical protein CDD80_2801 no_desc
## 2 GQ602_000039 ---NA--- no_desc
## 3 GQ602_000042 retinol dehydrogenase protein no_desc
## 4 GQ602_000134 F-box domain containing protein no_desc
## 5 GQ602_000146 Zinc finger, C2H2-type/integrase, DNA-binding protein no_desc
## 6 GQ602_000153 hypothetical protein CDD80_7375 no_desc
## 7 GQ602_000179 predicted protein no_desc
## 8 GQ602_000197 F-box-like domain-containing protein no_desc
## 9 GQ602_000200 ---NA--- no_desc
## 10 GQ602_000216 hypothetical protein CDD80_6240 no_desc
## # … with 252 more rows
Next, I wanted to see if certain gene clusters show a characteristic difference in their daily expression in a species-specific manner (meaning, same genes but different daily expression in the two fungal species?).
To do so, I performed hierarchical clustering of all orthologous genes that show sig. 24h rhythms in EITHER Ocflo OR Beau to identify clusters of genes that show a synchronized expression in each of the two fungal species.
rhy.homology.dat <-
homology.dat %>%
filter(beau_gene %in% rhy[[1]] | ophio_gene %in% rhy[[2]])
### Make the dataframe for plotting
zscore.rhy.homology.dat <-
zscore.dat[[1]] %>%
filter(gene_name %in% rhy.homology.dat[[1]]) %>%
rename_at(vars(starts_with("ZT")), ~ (gsub("A", "B", .x, fixed = TRUE))) %>% # fix colnames for beau
# add ophio homologs for the beau genes
left_join(rhy.homology.dat, by=c("gene_name" = "beau_gene")) %>%
# remove the beau names and keep the ophio names only
select(-1) %>%
select(gene_name = ophio_gene, everything()) %>%
# join ophio-cflo data
left_join(zscore.dat[[2]], by="gene_name") %>%
# drop any genes without expression values (NA)
na.omit() %>%
as.data.frame() %>%
# set genes as rownames
column_to_rownames("gene_name")
# Set genes as rownames and convert it into a matrix
# rownames(zscore.rhy.homology.dat) = zscore.rhy.homology.dat$gene_name
zscore.rhy.homology.dat <- as.matrix(zscore.rhy.homology.dat)
# Hierarchical clustering of the genesets
my_hclust_gene <- hclust(dist(zscore.rhy.homology.dat), method = "complete")
# Make annotations for the heatmaps
n_clusters <- 4
my_clusters <- cutree(tree = as.dendrogram(my_hclust_gene), k = n_clusters) # k= clusters
my_gene_col <- data.frame(cluster = my_clusters)
# I’ll add some column annotations and create the heatmap.
# Annotations for:
# 1. Is the sample collected during the light or dark phase?
my_sample_col <- data.frame(phase = rep(rep(c("light", "dark", "light"),c(5,6,1)),2),
conds = rep(c("beau", "ophio_cflo"), each=12))
row.names(my_sample_col) <- colnames(zscore.rhy.homology.dat)
# Manual color palette
my_colour = list(
phase = c(light = "#F2E18D", dark = "#010440"),
conds = c(beau = "#5A829F", ophio_cflo = "#AD212F"),
cluster = viridis::cividis(100)[c(seq(10,80,by=round(80/(n_clusters), 0)))])
# Color scale
my.breaks = seq(min(zscore.rhy.homology.dat), max(zscore.rhy.homology.dat), by=0.1)
# my.breaks = seq(min(zscore.rhy), max(zscore.rhy), by=0.06)
# Let's plot!
pheatmap(zscore.rhy.homology.dat, show_rownames = F, show_colnames = F,
annotation_row = my_gene_col,
annotation_col = my_sample_col,
cutree_rows = n_clusters, # OG was 4
# cutree_cols = 4,
gaps_col = c(12,24),
annotation_colors = my_colour,
border_color=FALSE,
cluster_cols = F,
breaks = my.breaks,
## color scheme borrowed from:
color = inferno(length(my.breaks) - 1),
treeheight_row = 0,
# treeheight_col = 0,
# remove the color scale or not
main = paste0("24h-rhythmic in Ocflo or Beau \n (n=",
nrow(zscore.rhy.homology.dat), " orthologous genes)"),
## annotation legend
annotation_legend = T,
## Color scale
legend = T)
sampleName <- c("ophio_cflo","ophio_ophio-infected")
for (j in 1:2) {
for (i in 1:n_clusters){
writeLines(paste0("Species: ", sample.name[[j]], "\n", "24h-rhythmic genes, Cluster: ", i))
# Summary
genes <- my_gene_col %>% rownames_to_column("g") %>% filter(cluster==as.character(i)) %>% pull(g)
writeLines(paste0("n(genes) = ", length(genes),"\n"))
# define the background geneset for enrichment analysis
bg.genes <- homology.dat %>% pull(ophio_gene) %>% unique()
## Transform gene names (ophio -> beau) and refine background geneset
if (j == 1) {
genes <-
homology.dat %>%
filter(ophio_gene %in% genes) %>%
pull(beau_gene)
bg.genes <- homology.dat %>% pull(beau_gene) %>% unique()
}
# Enrichment
# overrepresented.terms <-
genes %>%
check_enrichment(.,
what = "pfams",
org = sample.name[[j]],
bg = expressed[[j]],
filter = T,
expand = F,
plot = T) %>% as.data.frame() %>% select(1:2) %>% head(15)
print(overrepresented.terms)
writeLines(paste0("\n", "n(overrepresented terms) = ", nrow(overrepresented.terms), "\n"))
# Stacked zplot
if (j==2) {
# Stacked zplot
stacked.plot1 <- genes %>% stacked.zplot_tc6(cond = sampleName[[1]]) %>% pluck(1)
stacked.plot2 <- genes %>% stacked.zplot_tc6(cond = sampleName[[2]]) %>% pluck(1)
ggpubr::ggarrange(plotlist=list(stacked.plot1, stacked.plot2),
nrow = 1, ncol = 2,
widths = c(1,1), labels = NA) %>%
print()
} else {
genes %>%
stacked.zplot_tc6(cond = sample.name[[j]]) %>%
multi.plot(rows = 1, cols = 1)
}
}
}
## Species: beau
## 24h-rhythmic genes, Cluster: 1
## n(genes) = 718
##
## [1] "Loading annotation file for Beauveria bassiana"
## [1] "Done."
## [1] "Testing for enrichment..."
## # A tibble: 4 x 7
## annot_term annot_desc adj_pVal sam_freq back_freq n_annot_bg gene_name
## <chr> <chr> <dbl> <dbl> <dbl> <int> <chr>
## 1 PF00118 Cpn60_TCP1 0 0.024 0.002 10 GQ602_000364, GQ…
## 2 PF00076 RRM_1 0 0.058 0.014 60 GQ602_000255, GQ…
## 3 PF00004 AAA 0.00033 0.034 0.01 45 GQ602_000605, GQ…
## 4 PF00400 WD40 0.0168 0.041 0.021 91 GQ602_000228, GQ…
##
## n(overrepresented terms) = 4
## Species: beau
## 24h-rhythmic genes, Cluster: 2
## n(genes) = 1026
##
## [1] "Loading annotation file for Beauveria bassiana"
## [1] "Done."
## [1] "Testing for enrichment..."
## # A tibble: 4 x 7
## annot_term annot_desc adj_pVal sam_freq back_freq n_annot_bg gene_name
## <chr> <chr> <dbl> <dbl> <dbl> <int> <chr>
## 1 PF00118 Cpn60_TCP1 0 0.024 0.002 10 GQ602_000364, GQ…
## 2 PF00076 RRM_1 0 0.058 0.014 60 GQ602_000255, GQ…
## 3 PF00004 AAA 0.00033 0.034 0.01 45 GQ602_000605, GQ…
## 4 PF00400 WD40 0.0168 0.041 0.021 91 GQ602_000228, GQ…
##
## n(overrepresented terms) = 4
## Species: beau
## 24h-rhythmic genes, Cluster: 3
## n(genes) = 451
##
## [1] "Loading annotation file for Beauveria bassiana"
## [1] "Done."
## [1] "Testing for enrichment..."
## # A tibble: 4 x 7
## annot_term annot_desc adj_pVal sam_freq back_freq n_annot_bg gene_name
## <chr> <chr> <dbl> <dbl> <dbl> <int> <chr>
## 1 PF00118 Cpn60_TCP1 0 0.024 0.002 10 GQ602_000364, GQ…
## 2 PF00076 RRM_1 0 0.058 0.014 60 GQ602_000255, GQ…
## 3 PF00004 AAA 0.00033 0.034 0.01 45 GQ602_000605, GQ…
## 4 PF00400 WD40 0.0168 0.041 0.021 91 GQ602_000228, GQ…
##
## n(overrepresented terms) = 4
## Species: beau
## 24h-rhythmic genes, Cluster: 4
## n(genes) = 324
##
## [1] "Loading annotation file for Beauveria bassiana"
## [1] "Done."
## [1] "Testing for enrichment..."
## # A tibble: 4 x 7
## annot_term annot_desc adj_pVal sam_freq back_freq n_annot_bg gene_name
## <chr> <chr> <dbl> <dbl> <dbl> <int> <chr>
## 1 PF00118 Cpn60_TCP1 0 0.024 0.002 10 GQ602_000364, GQ…
## 2 PF00076 RRM_1 0 0.058 0.014 60 GQ602_000255, GQ…
## 3 PF00004 AAA 0.00033 0.034 0.01 45 GQ602_000605, GQ…
## 4 PF00400 WD40 0.0168 0.041 0.021 91 GQ602_000228, GQ…
##
## n(overrepresented terms) = 4
## Species: ophio_cflo
## 24h-rhythmic genes, Cluster: 1
## n(genes) = 718
##
## [1] "Loading annotation file for Ophiocordyceps camponoti-floridani"
## [1] "Done."
## [1] "Testing for enrichment..."
## # A tibble: 4 x 7
## annot_term annot_desc adj_pVal sam_freq back_freq n_annot_bg gene_name
## <chr> <chr> <dbl> <dbl> <dbl> <int> <chr>
## 1 PF00118 Cpn60_TCP1 0 0.024 0.002 10 GQ602_000364, GQ…
## 2 PF00076 RRM_1 0 0.058 0.014 60 GQ602_000255, GQ…
## 3 PF00004 AAA 0.00033 0.034 0.01 45 GQ602_000605, GQ…
## 4 PF00400 WD40 0.0168 0.041 0.021 91 GQ602_000228, GQ…
##
## n(overrepresented terms) = 4
## Species: ophio_cflo
## 24h-rhythmic genes, Cluster: 2
## n(genes) = 1026
##
## [1] "Loading annotation file for Ophiocordyceps camponoti-floridani"
## [1] "Done."
## [1] "Testing for enrichment..."
## # A tibble: 4 x 7
## annot_term annot_desc adj_pVal sam_freq back_freq n_annot_bg gene_name
## <chr> <chr> <dbl> <dbl> <dbl> <int> <chr>
## 1 PF00118 Cpn60_TCP1 0 0.024 0.002 10 GQ602_000364, GQ…
## 2 PF00076 RRM_1 0 0.058 0.014 60 GQ602_000255, GQ…
## 3 PF00004 AAA 0.00033 0.034 0.01 45 GQ602_000605, GQ…
## 4 PF00400 WD40 0.0168 0.041 0.021 91 GQ602_000228, GQ…
##
## n(overrepresented terms) = 4
## Species: ophio_cflo
## 24h-rhythmic genes, Cluster: 3
## n(genes) = 451
##
## [1] "Loading annotation file for Ophiocordyceps camponoti-floridani"
## [1] "Done."
## [1] "Testing for enrichment..."
## # A tibble: 4 x 7
## annot_term annot_desc adj_pVal sam_freq back_freq n_annot_bg gene_name
## <chr> <chr> <dbl> <dbl> <dbl> <int> <chr>
## 1 PF00118 Cpn60_TCP1 0 0.024 0.002 10 GQ602_000364, GQ…
## 2 PF00076 RRM_1 0 0.058 0.014 60 GQ602_000255, GQ…
## 3 PF00004 AAA 0.00033 0.034 0.01 45 GQ602_000605, GQ…
## 4 PF00400 WD40 0.0168 0.041 0.021 91 GQ602_000228, GQ…
##
## n(overrepresented terms) = 4
## Species: ophio_cflo
## 24h-rhythmic genes, Cluster: 4
## n(genes) = 324
##
## [1] "Loading annotation file for Ophiocordyceps camponoti-floridani"
## [1] "Done."
## [1] "Testing for enrichment..."
## # A tibble: 4 x 7
## annot_term annot_desc adj_pVal sam_freq back_freq n_annot_bg gene_name
## <chr> <chr> <dbl> <dbl> <dbl> <int> <chr>
## 1 PF00118 Cpn60_TCP1 0 0.024 0.002 10 GQ602_000364, GQ…
## 2 PF00076 RRM_1 0 0.058 0.014 60 GQ602_000255, GQ…
## 3 PF00004 AAA 0.00033 0.034 0.01 45 GQ602_000605, GQ…
## 4 PF00400 WD40 0.0168 0.041 0.021 91 GQ602_000228, GQ…
##
## n(overrepresented terms) = 4
## Visualize the overlap
cluster.dat <- list()
for (i in 1:n_clusters) {
cluster.dat[[i]] <- my_gene_col %>%
rownames_to_column("g") %>% filter(cluster==as.character(i)) %>% pull(g)
}
names(cluster.dat) <- paste0("Cluster_",1:4)
for (j in 1:2) {
rhy.dat <- rhy[[j]]
cluster.dat.dummy <- cluster.dat
if (j == 1) {
for (i in 1:n_clusters) {
cluster.dat.dummy[[i]] <-
homology.dat %>%
filter(ophio_gene %in% cluster.dat.dummy[[i]]) %>%
pull(beau_gene)
}
}
listInput <- list(rhy.dat,
cluster.dat.dummy[[1]], cluster.dat.dummy[[2]],
cluster.dat.dummy[[3]], cluster.dat.dummy[[4]])
names(listInput) <- c(paste0(sample.name[[j]], c("_rhy24")), paste0("cluster_",1:4))
library(UpSetR)
library(viridis)
# caste.col <- c("#F23030","#1A80D9")
upset(fromList(listInput),
number.angles = 0, point.size = 3, line.size = 1.5,
mainbar.y.label = "Number of overlapping genes",
sets.x.label = "Sig. rhy genes",
text.scale = c(1.5, # y-axis label ("# overlapping genes")
2, # y-axis tick labels ("1000, 2000,..")
1.5, # label for histogram ("sig. rhy genes")
1, # tick labels for histogram
1.5, # set names ("Cflo-brain_08h,..")
1.5),
sets = names(listInput),
nintersects = 15,
keep.order = T,
sets.bar.color = viridis(1),
# adding queries
query.legend = "bottom"
) %>%
print()
}
It seems that majority of the genes in Cluster 3 and 4 are sig. rhythmic in Ophio but not in Beau. We will perform the pairwise Fisher’s exact test to find out. Let’s dig in!
NOTE: We need to think about the best way to perform the Fisher’s exact test. For starters, I am transforming all the gene names to Ophio
# LIST ONE - Cluster identity
list1 <- cluster.dat
names(list1) <- names(cluster.dat)
## LIST TWO - ophio rhythmic genes
beau.ortho.rhy <- homology.dat %>% filter(beau_gene %in% rhy[[1]]) %>% pull(ophio_gene) %>% unique()
ocflo.ortho.rhy <- homology.dat %>% filter(ophio_gene %in% rhy[[2]]) %>% pull(ophio_gene) %>% unique()
list2 <- list(beau.ortho.rhy, ocflo.ortho.rhy)
names(list2) <- paste0(sample.name[1:2], c("_24h"))
## CHECK FOR OVERLAP
library(GeneOverlap)
# define the background geneset
# in our case, it would be the number of orthologous genes between beau and Ophio_cflo
nGenes = homology.dat %>% pull(ophio_gene) %>% unique() %>% length()
## make a GOM object
gom.1v2 <- newGOM(list1, list2, genome.size = nGenes)
png(paste0(path_to_repo, "/results/figures/BD/ocflo_beau_orthologs_rhy_overlap.png"),
width = 15, height = 15, units = "cm", res = 300)
drawHeatmap(gom.1v2,
adj.p=T,
cutoff=0.01,
what="odds.ratio",
# what="Jaccard",
log.scale = T,
note.col = "grey60")
trash <- dev.off()
Orthologous rhy24 genes
As we predicted, Cluster 3 and 4 genes show a stronger overlap with 24h-rhythmic genes in O. cflo as comapred to Beauveria (as can be seen from both log2-odds ratio and the associated q-value). The signal is strongest for Cluster 4, so let’s see which genes are in this cluster.
## Get the annotation data
ocflo.annots <- read.csv(paste0(path_to_repo, "/data/ophio_cflo_TC6_data.csv"), stringsAsFactors = F) %>% as.tibble()
ocflo.annots %<>%
filter(expressed=="yes") %>%
select(gene_name = gene_ID_ncbi, gene_ID_robin, blast_annot, GammaP_24h, GOs:ophio_kim_homolog)
## Subset the gene_names
ocflo.annots %>%
filter(gene_name %in% cluster.dat[[4]]) %>%
# plot the q-values
ggplot() +
geom_density(aes(x=GammaP_24h)) +
labs(x=paste0(sample.name[[2]]," rhythmicity, 24h (q-value)")) +
scale_x_continuous(breaks = c(0,0.05, 0.1, 0.5, 1)) +
theme_Publication()
# which genes?
# filter(!is.na(GOs)) %>%
# view()
## Check these genes for other annotations (signalP, SSP, TMHMM)
# LIST ONE - Cluster identity
list1 <- cluster.dat
names(list1) <- names(cluster.dat)
## LIST TWO - ophio rhythmic genes
signalP <- ocflo.annots %>% filter(signalP == "yes") %>% pull(gene_name)
SSP <- ocflo.annots %>% filter(SSP == "yes") %>% pull(gene_name)
TMHMM <- ocflo.annots %>% filter(TMHMM == "yes") %>% pull(gene_name)
list2 <- list(signalP, SSP, TMHMM)
names(list2) <- paste0(sample.name[[2]], "-", c("signalP", "SSP", "TMHMM"))
## CHECK FOR OVERLAP
library(GeneOverlap)
# define the background geneset
# in our case, it would be the number of orthologous genes between beau and Ophio_cflo
nGenes = ocflo.annots %>% nrow()
## make a GOM object
gom <- newGOM(list1, list2, genome.size = nGenes)
png(paste0(path_to_repo, "/results/figures/BD/ocflo_beau_orthologs_annots_overlap.png"),
width = 15, height = 15, units = "cm", res = 300)
drawHeatmap(gom,
adj.p=T,
cutoff=0.01,
what="odds.ratio",
# what="Jaccard",
log.scale = T,
note.col = "grey60")
trash <- dev.off()
Overlap of orthologous rhy24 gene clusters with additional annotations
Prepare the functions, libraries required
# Let's load functions for running limorhyde
source(system.file('extdata', 'vignette_functions.R', package = 'limorhyde'))
# Let's load the libraries required for running Limorhyde
# library('annotate')
library('data.table')
library('foreach')
# library('GEOquery')
library('ggplot2')
library('knitr')
library('limma')
library('limorhyde')
conflict_prefer("union", "dplyr")
Create dataframe with metadata information for the different samples collected
sampleName <- c("ophio_cflo","ophio_ophio-infected")
short.name <- c("AC","AI") # AC = arb2-control, AI = arb2-infection
time.points <- c(2,4,6,8,10,12,14,16,18,20,22,24)
light.dark <- c(rep("light",times=5), rep("dark",times=6), rep("light", times=1))
meta <- data.frame(title = paste0(rep(sampleName, each=12),"_ZT",time.points),
sample = paste0(rep(time.points, times=2),rep(short.name, each=12)),
genotype = rep(sampleName, each=12),
time = rep(time.points, times=2),
cond = rep(sampleName, each=12),
LD = rep(light.dark, times=2),
stringsAsFactors = F)
meta %>% glimpse()
## Observations: 24
## Variables: 6
## $ title <chr> "ophio_cflo_ZT2", "ophio_cflo_ZT4", "ophio_cflo_ZT6", "ophio…
## $ sample <chr> "2AC", "4AC", "6AC", "8AC", "10AC", "12AC", "14AC", "16AC", …
## $ genotype <chr> "ophio_cflo", "ophio_cflo", "ophio_cflo", "ophio_cflo", "oph…
## $ time <dbl> 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 2, 4, 6, 8, 10, …
## $ cond <chr> "ophio_cflo", "ophio_cflo", "ophio_cflo", "ophio_cflo", "oph…
## $ LD <chr> "light", "light", "light", "light", "light", "dark", "dark",…
Now, format the metadata.
### 1.1.1 Format the meta-data ----------------
# load the meta-data
sm <- meta
# Let's format the columns in the right data-type
sm$time <- as.numeric(sm$time)
# sm$batch <- as.factor(sm$batch)
sm$LD <- as.factor(sm$LD)
# sm$location <- as.factor(sm$location)
# Let's get a glimpse of the metadata
sm %>% as_tibble() %>% head()
## # A tibble: 6 x 6
## title sample genotype time cond LD
## <chr> <chr> <chr> <dbl> <chr> <fct>
## 1 ophio_cflo_ZT2 2AC ophio_cflo 2 ophio_cflo light
## 2 ophio_cflo_ZT4 4AC ophio_cflo 4 ophio_cflo light
## 3 ophio_cflo_ZT6 6AC ophio_cflo 6 ophio_cflo light
## 4 ophio_cflo_ZT8 8AC ophio_cflo 8 ophio_cflo light
## 5 ophio_cflo_ZT10 10AC ophio_cflo 10 ophio_cflo light
## 6 ophio_cflo_ZT12 12AC ophio_cflo 12 ophio_cflo dark
# Next we use limorhyde to calculate time_cos and time_sin, which are based on the first
#harmonic of a Fourier decomposition of the time column, and append them to the sm data frame.
sm = cbind(sm, limorhyde(sm$time, 'time_'))
# convert the dataframe into a data.table
sm <- data.table(sm)
# check that it worked
sm[1:5, ]
## title sample genotype time cond LD time_cos
## 1: ophio_cflo_ZT2 2AC ophio_cflo 2 ophio_cflo light 8.660254e-01
## 2: ophio_cflo_ZT4 4AC ophio_cflo 4 ophio_cflo light 5.000000e-01
## 3: ophio_cflo_ZT6 6AC ophio_cflo 6 ophio_cflo light 6.123234e-17
## 4: ophio_cflo_ZT8 8AC ophio_cflo 8 ophio_cflo light -5.000000e-01
## 5: ophio_cflo_ZT10 10AC ophio_cflo 10 ophio_cflo light -8.660254e-01
## time_sin
## 1: 0.5000000
## 2: 0.8660254
## 3: 1.0000000
## 4: 0.8660254
## 5: 0.5000000
## DATASET 1
## Load the control O.cflo data (from TC6)
ocflo.control.dat <-
data.db %>%
tbl(., paste0(sampleName[[1]], "_fpkm")) %>%
select(gene_name, everything()) %>%
collect()
## DATASET 2
## Load the O.cflo infection data from the mixed transcriptomics study (from TC7)
inf.db <- dbConnect(RSQLite::SQLite(),
paste0(path_to_repo,"/../Das_et_al_2022b/data/databases/TC7_data.db"))
# src_dbi(inf.db)
# extract the (gene-expr X time-point) data
ocflo.inf.dat <-
inf.db %>%
tbl(., paste0(sampleName[[2]], "_fpkm")) %>%
select(gene_name, everything()) %>%
collect()
The goal is to use only the genes that show expression (>1 FPKM) for at least half of the timepoints during the 24h day (i.e., 6 of the 12 timepoints).
## DATASET 1
n.exp.1 <- apply(ocflo.control.dat[-1], 1, function(x) sum(x>=1))
ocflo.control.dat <- ocflo.control.dat[which(n.exp.1>=6),]
colnames(ocflo.control.dat)[-1] <- paste0("ZT", meta[meta$cond==sampleName[[1]],] %>% pull(sample))
## DATASET 2
n.exp.2 <- apply(ocflo.inf.dat[-1], 1, function(x) sum(x>=1))
ocflo.inf.dat <- ocflo.inf.dat[which(n.exp.2>=6),]
colnames(ocflo.inf.dat)[-1] <- paste0("ZT", meta[meta$cond==sampleName[[2]],] %>% pull(sample))
## Use the genes that are expressed in both conditions
emat <-
ocflo.control.dat %>%
filter(gene_name %in% ocflo.inf.dat$gene_name) %>%
left_join(ocflo.inf.dat, by="gene_name") %>%
as.data.frame()
Next, let’s perform the DEG analyses for the ophio-cflo (halfway through infection v. controls)
### Convert to a matrix
# save gene names as row names
rownames(emat) <- emat[,1]
emat <- emat[,-1]
# Need to make the emat into a matrix.
emat <- data.matrix(emat)
# log2 transform the data
emat <- log2(emat + 1)
### Set thresholds
# Set threshold for q-value and log2FC
q.threshold <- 0.05 # currently, using 5% FDR (BH adjusted p-value)
log2.foldchange <- 1 # thus, any gene with a 2^(log2.foldchange) fold change in it's expression
### Format the metadata, if necessary
# Filter the metadata according to your comparison
sm.sub <- sm %>% filter(cond %in% c(sampleName))
# Define the cond column as a factor
sm.sub$cond <- as.factor(sm.sub$cond)
### Let's run the DEG analyses
# Use the subsetted emat to find DEGs
design.deg = model.matrix(~ cond + time_cos + time_sin, data = sm.sub)
#
fit = lmFit(emat, design.deg)
fit = eBayes(fit, trend = TRUE)
# Take a look at the coefficients table
# fit$coefficients %>% head()
#
deLimma.deg = data.table(topTable(fit, coef = 2, number = Inf), keep.rownames = TRUE)
setnames(deLimma.deg, 'rn', 'gene_name')
deLimma.deg[, adj.P.Val := p.adjust(P.Value, method = 'BH')]
setorderv(deLimma.deg, 'adj.P.Val')
### Annotate the results
# Annotate the results to indicate the significant genes
all.DEGs <-
deLimma.deg %>%
arrange(desc(abs(logFC)), adj.P.Val) %>%
mutate(sig = as.factor(ifelse(adj.P.Val < q.threshold & abs(logFC) >= log2.foldchange, "yes", "no"))) %>%
mutate(inf_v_control = as.factor(ifelse(sig=="yes", ifelse( logFC > 0, "up", "down" ), "NA"))) %>%
mutate(inf = sampleName[[2]])
### Summarize the results
writeLines(paste0("\nControl-", sampleName[[1]], " v. ", sampleName[[2]], "\n--Results of DEG analysis--"))
##
## Control-ophio_cflo v. ophio_ophio-infected
## --Results of DEG analysis--
## How many DEGs - 5% FDR and ≥ 1 fold change in gene expression
all.DEGs %>%
# filter(adj.P.Val < q.threshold) %>%
# filter(abs(logFC) >= 2) %>% # change the criteria here for top DEG or all DEG (logFC≥1)
filter(sig == "yes") %>%
# pull(gene_name) %>%
group_by(inf_v_control) %>%
summarise(n_genes = n()) %>%
as.data.frame() %>%
## n = 81 up- and 141 down-regulated genes in Cflo heads during Ophio-infection
## (at 5% FDR; log2-fold-change ≥ 1)
print()
## inf_v_control n_genes
## 1 down 395
## 2 up 318
### Subset to keep only sig. DEGs
sig.DEGs <- all.DEGs %>% filter(sig=="yes")
# Volcano plot
library(viridis)
ggplot(all.DEGs) +
# geom_hline(yintercept = -log10(0.05), col="red", alpha=0.6) +
# geom_vline(xintercept = c(-2,2), col="grey60", alpha=0.75) +
geom_point(aes(x = logFC, y = -log10(adj.P.Val), color=sig), size = 1.5, alpha = 0.5) +
labs(x = expression(log[2]*' fold-change (inf_v_control)'),
y = expression(-log[10]*' '*q[DE]),
title = "O.cflo (infection v. control)",
color = "significant") +
# scale_x_continuous(limits = c(-5,3),
# breaks = c(-5,-4,-3,-2,-1,0,1,2,3),
# labels = c("-5","","-3","","-1","","1","","3")) +
# xlim(c(-50,50)) +
theme_Publication() +
scale_color_viridis(discrete = T, direction = -1, option = "viridis")
## Load the ophio DEG (at manipulation) data from Will et al. 2020
will2020_data <- read.csv(paste0(path_to_repo,
"/data/input/ophio_cflo/complete_annotations/FullBlast_EC05_RNAseq_orignal_copy_26Aug19.csv"),
stringsAsFactors = F)
will2020_data %<>%
as_tibble() %>%
filter(sample_1=="Alive" & sample_2=="Fungus") %>%
select(arb2_gene, logFC = log2.fold_change., q_value, significant) %>%
mutate(logFC=as.numeric(logFC), q_value=as.numeric(q_value)) %>%
filter(significant=="yes") %>%
mutate(up_down = ifelse(logFC > 0, "down", "up")) %>%
mutate(logFC = -1*logFC) %>%
na.omit()
### Change ophio gene names to ncbi IDs
will2020_data %<>%
left_join(ocflo.annots[1:3], by=c("arb2_gene"="gene_ID_robin")) %>%
select(-1) %>%
select(gene_name, blast_annot, everything())
### Subset the up/down-regulated genes
### At halfway-through disease progression
inf.up <- sig.DEGs %>% filter(inf_v_control=="up") %>% pull(gene_name)
inf.down <- sig.DEGs %>% filter(inf_v_control=="down") %>% pull(gene_name)
### At active manipulation
manip.up <- will2020_data %>% filter(up_down=="up") %>% pull(gene_name)
manip.down <- will2020_data %>% filter(up_down=="down") %>% pull(gene_name)
### Visualize the results
listInput <- list(inf.up, inf.down, manip.up, manip.down)
names(listInput) <- c(paste0("inf_",c("up","down")), paste0("manip_", c("up","down")))
library(UpSetR)
library(viridis)
upset(fromList(listInput),
number.angles = 0, point.size = 3, line.size = 1.5,
mainbar.y.label = "Number of overlapping genes",
sets.x.label = "Sig. DE genes",
text.scale = c(1.5, # y-axis label ("# overlapping genes")
2, # y-axis tick labels ("1000, 2000,..")
1.5, # label for histogram ("sig. rhy genes")
1, # tick labels for histogram
1.5, # set names ("Cflo-brain_08h,..")
1.5),
sets = names(listInput),
nintersects = 15,
keep.order = T,
sets.bar.color = viridis(1),
# adding queries
query.legend = "bottom"
) %>%
print()
### Test significance of overlap
list1 <- list(inf.up, inf.down)
names(list1) <- paste0("inf_",c("up","down"))
list2 <- list(manip.up, manip.down)
names(list2) <- paste0("manip_", c("up","down"))
bg.genes <- all.DEGs %>% nrow()
overlap <- check_overlap(list1, list2, bg.genes)
Let’s plot the daily expression of the DEGs (ocflo controls v. during infection)
# inf.down %>%
# # stacked.zplot_tc6(cond = "inf")
# stacked.zplot_tc6(cond = "ophio")
## Get the ocflo infection timecourse data (zscores)
zscore.inf.dat <- inf.db %>% tbl(., paste0(sampleName[[2]], "_zscores")) %>% collect()
colnames(zscore.inf.dat)[-1] <- paste0("ZT",meta[meta$cond==sampleName[[2]],] %>% pull(sample))
## Specify parameters
n_clusters <- 4
which.degs <- list(inf.up, inf.down, manip.up, manip.down)
names(which.degs) <- c("inf.up", "inf.down", "manip.up", "manip.down")
for (i in 1:length(which.degs)) {
## Which genes to look at?
# which.genes <- c(inf.up,inf.down)
which.genes <- which.degs[[i]]
### Make the dataframe for plotting
zscore.deg.dat <-
zscore.dat[[2]] %>%
filter(gene_name %in% which.genes) %>%
# add data from infection
left_join(zscore.inf.dat, by="gene_name") %>%
# drop any genes without expression values (NA)
na.omit() %>%
as.data.frame() %>%
# set genes as rownames
column_to_rownames("gene_name")
# Set genes as rownames and convert it into a matrix
# rownames(zscore.rhy.homology.dat) = zscore.rhy.homology.dat$gene_name
zscore.deg.dat <- as.matrix(zscore.deg.dat)
# Hierarchical clustering of the genesets
my_hclust_gene <- hclust(dist(zscore.deg.dat), method = "complete")
# Make annotations for the heatmaps
my_clusters <- cutree(tree = as.dendrogram(my_hclust_gene), k = n_clusters) # k= number of clusters
my_gene_col <- data.frame(cluster = my_clusters)
# I’ll add some column annotations and create the heatmap.
# Annotations for:
# 1. Is the sample collected during the light or dark phase?
my_sample_col <- data.frame(phase = rep(rep(c("light", "dark", "light"),c(5,6,1)),2),
conds = rep(c("ocflo_controls", "ocflo_infection"), each=12))
row.names(my_sample_col) <- colnames(zscore.deg.dat)
# Manual color palette
my_colour = list(
phase = c(light = "#F2E18D", dark = "#010440"),
conds = c(ocflo_controls = col.scheme[[2]], ocflo_infection = col.scheme[[4]]),
cluster = viridis::cividis(100)[c(seq(10,80,by=round(80/(n_clusters), 0)))])
# Color scale
my.breaks = seq(min(zscore.deg.dat), max(zscore.deg.dat), by=0.1)
# my.breaks = seq(min(zscore.rhy), max(zscore.rhy), by=0.06)
# Let's plot!
pheatmap(zscore.deg.dat, show_rownames = F, show_colnames = F,
annotation_row = my_gene_col,
annotation_col = my_sample_col,
cutree_rows = n_clusters, # OG was 4
# cutree_cols = 2,
gaps_col = c(12,24),
annotation_colors = my_colour,
border_color=FALSE,
cluster_cols = F,
breaks = my.breaks,
## color scheme borrowed from:
color = inferno(length(my.breaks) - 1),
treeheight_row = 0,
# treeheight_col = 0,
# remove the color scale or not
main = paste0("DEGs - ",names(which.degs)[[i]], "\n (n=",nrow(zscore.deg.dat), " genes)"),
## annotation legend
annotation_legend = T,
## Color scale
legend = T) %>%
print()
for (j in 1:n_clusters){
writeLines(paste0("Which DEGs: ", names(which.degs)[[i]], "\n", "Cluster: ", j))
# Summary
genes <- my_gene_col %>% rownames_to_column("g") %>% filter(cluster==as.character(j)) %>% pull(g)
writeLines(paste0("n(genes) = ", length(genes),"\n"))
# define the background geneset for enrichment analysis
bg.genes <- all.DEGs %>% pull(gene_name)
# Enrichment
overrepresented.terms <-
genes %>%
check_enrichment(.,
what = "pfams",
org = sampleName[[1]],
bg = bg.genes,
filter = T,
expand = T,
plot = T)
writeLines(paste0("\n", "n(overrepresented terms) = ", nrow(overrepresented.terms), "\n"))
overrepresented.terms %>% print()
# Stacked zplot
stacked.plot1 <- genes %>% stacked.zplot_tc6(cond = sampleName[[1]]) %>% pluck(1)
stacked.plot2 <- genes %>% stacked.zplot_tc6(cond = sampleName[[2]]) %>% pluck(1)
ggpubr::ggarrange(plotlist=list(stacked.plot1, stacked.plot2),
nrow = 1, ncol = 2,
widths = c(1,1), labels = NA) %>%
print()
}
}
## Which DEGs: inf.up
## Cluster: 1
## n(genes) = 230
##
## [1] "Loading annotation file for Ophiocordyceps camponoti-floridani"
## [1] "Done."
## [1] "Testing for enrichment..."
##
## n(overrepresented terms) = 27
##
## # A tibble: 27 x 3
## # Groups: gene_name [27]
## gene_name gene_desc annot_desc
## <chr> <chr> <chr>
## 1 GQ602_000632 sugar transporter STL1 MFS_1
## 2 GQ602_000695 fungal specific transcription factor domain-c… Fungal_trans
## 3 GQ602_000758 Tubulin gamma chain MFS_1
## 4 GQ602_000992 cytochrome P450 p450
## 5 GQ602_001053 Transcription factor, fungi Fungal_trans; Zn…
## 6 GQ602_001338 related to hexose transporter protein MFS_1
## 7 GQ602_001657 MFS transporter MFS_1
## 8 GQ602_001664 General substrate transporter MFS_1
## 9 GQ602_001839 membrane protein MFS_1
## 10 GQ602_002731 related to acetate regulatory DNA binding pro… Zn_clus
## # … with 17 more rows
## Which DEGs: inf.up
## Cluster: 2
## n(genes) = 44
##
## [1] "Loading annotation file for Ophiocordyceps camponoti-floridani"
## [1] "Done."
##
## n(overrepresented terms) =
##
## [1] "There are no pfams terms to test enrichment for."
## Which DEGs: inf.up
## Cluster: 3
## n(genes) = 34
##
## [1] "Loading annotation file for Ophiocordyceps camponoti-floridani"
## [1] "Done."
##
## n(overrepresented terms) =
##
## [1] "There are no pfams terms to test enrichment for."
## Which DEGs: inf.up
## Cluster: 4
## n(genes) = 10
##
## [1] "Loading annotation file for Ophiocordyceps camponoti-floridani"
## [1] "Done."
##
## n(overrepresented terms) =
##
## [1] "There are no pfams terms to test enrichment for."
## Which DEGs: inf.down
## Cluster: 1
## n(genes) = 79
##
## [1] "Loading annotation file for Ophiocordyceps camponoti-floridani"
## [1] "Done."
##
## n(overrepresented terms) =
##
## [1] "There are no pfams terms to test enrichment for."
## Which DEGs: inf.down
## Cluster: 2
## n(genes) = 169
##
## [1] "Loading annotation file for Ophiocordyceps camponoti-floridani"
## [1] "Done."
## [1] "Testing for enrichment..."
##
## n(overrepresented terms) = 85
##
## # A tibble: 85 x 3
## # Groups: gene_name [85]
## gene_name gene_desc annot_desc
## <chr> <chr> <chr>
## 1 GQ602_000251 Kin of IRRE-like protein 3 no_desc
## 2 GQ602_000386 hypothetical protein CDD80_7580 no_desc
## 3 GQ602_000718 ---NA--- no_desc
## 4 GQ602_000794 hypothetical protein CDD80_1909 no_desc
## 5 GQ602_000881 annexin ANXC4 no_desc
## 6 GQ602_000916 L-fucose permease no_desc
## 7 GQ602_001071 hypothetical protein CDD80_4002 no_desc
## 8 GQ602_001147 MFS transporter MFS_1
## 9 GQ602_001511 transcription factor bZIP no_desc
## 10 GQ602_001539 double strand RNA binding from dead end protein 1 do… no_desc
## # … with 75 more rows
## Which DEGs: inf.down
## Cluster: 3
## n(genes) = 103
##
## [1] "Loading annotation file for Ophiocordyceps camponoti-floridani"
## [1] "Done."
##
## n(overrepresented terms) =
##
## [1] "There are no pfams terms to test enrichment for."
## Which DEGs: inf.down
## Cluster: 4
## n(genes) = 44
##
## [1] "Loading annotation file for Ophiocordyceps camponoti-floridani"
## [1] "Done."
##
## n(overrepresented terms) =
##
## [1] "There are no pfams terms to test enrichment for."
## Which DEGs: manip.up
## Cluster: 1
## n(genes) = 275
##
## [1] "Loading annotation file for Ophiocordyceps camponoti-floridani"
## [1] "Done."
## [1] "Testing for enrichment..."
##
## n(overrepresented terms) = 8
##
## # A tibble: 8 x 3
## # Groups: gene_name [8]
## gene_name gene_desc annot_desc
## <chr> <chr> <chr>
## 1 GQ602_000359 QDE3 protein ResIII; Helicase_C
## 2 GQ602_000777 putative ATP-dependent helicase C23E6.02 SNF2_N; ResIII; Helica…
## 3 GQ602_000950 chromatin remodelling complex ATPase cha… SNF2_N; ResIII; Helica…
## 4 GQ602_001702 DNA repair protein RAD5 SNF2_N; ResIII; Helica…
## 5 GQ602_003520 DNA repair protein RAD16 SNF2_N; ResIII; Helica…
## 6 GQ602_004971 ATP-dependent RNA helicase DBP8 Helicase_C
## 7 GQ602_005416 Helicase SWR1 SNF2_N; ResIII; Helica…
## 8 GQ602_006318 ISWI chromatin-remodeling complex ATPase… SNF2_N; ResIII; Helica…
## Which DEGs: manip.up
## Cluster: 2
## n(genes) = 432
##
## [1] "Loading annotation file for Ophiocordyceps camponoti-floridani"
## [1] "Done."
## [1] "Testing for enrichment..."
##
## n(overrepresented terms) = 31
##
## # A tibble: 31 x 3
## # Groups: gene_name [31]
## gene_name gene_desc annot_desc
## <chr> <chr> <chr>
## 1 GQ602_000074 4-coumarate-CoA ligase 2 AMP-binding
## 2 GQ602_000222 2,4-dienoyl-CoA reductase (NADPH2) adh_short; adh_short_C2; KR
## 3 GQ602_000533 Amine oxidase NAD_binding_8
## 4 GQ602_000535 RING finger domain protein zf-RING_2
## 5 GQ602_000545 Amine oxidase NAD_binding_8
## 6 GQ602_000698 thiamine biosynthetic enzyme NAD_binding_8
## 7 GQ602_001035 2-succinylbenzoate-CoA ligase AMP-binding
## 8 GQ602_001440 short-chain dehydrogenase adh_short; adh_short_C2; KR
## 9 GQ602_001568 CHY zinc finger containing protein zf-RING_2
## 10 GQ602_001579 AMP-binding enzyme AMP-binding
## # … with 21 more rows
## Which DEGs: manip.up
## Cluster: 3
## n(genes) = 788
##
## [1] "Loading annotation file for Ophiocordyceps camponoti-floridani"
## [1] "Done."
## [1] "Testing for enrichment..."
##
## n(overrepresented terms) = 319
##
## # A tibble: 319 x 3
## # Groups: gene_name [319]
## gene_name gene_desc annot_desc
## <chr> <chr> <chr>
## 1 GQ602_000018 heat-labile enterotoxin subunit alpha Enterotoxin_a
## 2 GQ602_000037 myb family transcription factor no_desc
## 3 GQ602_000140 predicted protein no_desc
## 4 GQ602_000154 hypothetical protein CDD80_1007 no_desc
## 5 GQ602_000157 hypothetical protein CDD80_7157 no_desc
## 6 GQ602_000176 Galactose oxidase, beta-propeller WSC
## 7 GQ602_000180 pyruvate carboxylase no_desc
## 8 GQ602_000188 hypothetical protein CDD80_7365 no_desc
## 9 GQ602_000189 F-box domain protein no_desc
## 10 GQ602_000190 hypothetical protein CDD80_3433 no_desc
## # … with 309 more rows
## Which DEGs: manip.up
## Cluster: 4
## n(genes) = 368
##
## [1] "Loading annotation file for Ophiocordyceps camponoti-floridani"
## [1] "Done."
## [1] "Testing for enrichment..."
##
## n(overrepresented terms) = 15
##
## # A tibble: 15 x 3
## # Groups: gene_name [15]
## gene_name gene_desc annot_desc
## <chr> <chr> <chr>
## 1 GQ602_000593 ATP-dependent RNA helicase DED1 Helicase_C
## 2 GQ602_001793 DNA repair protein rhp54 SNF2_N; ResIII; Hel…
## 3 GQ602_001961 DNA helicase PIF1, ATP-dependent AAA
## 4 GQ602_004123 SNF2 family helicase SNF2_N; Helicase_C
## 5 GQ602_004402 SNF2-related protein SNF2_N; Helicase_C
## 6 GQ602_004921 SNF2-family ATP dependent chromatin remode… SNF2_N; ResIII; Hel…
## 7 GQ602_005058 replication factor C subunit 4 AAA
## 8 GQ602_005390 SNF2-related protein SNF2_N; ResIII; Hel…
## 9 GQ602_005470 SNF2-related protein SNF2_N; ResIII; Hel…
## 10 GQ602_005747 replication factor C subunit 3/5 AAA
## 11 GQ602_006164 ATP-dependent RNA helicase SUB2 ResIII; Helicase_C
## 12 GQ602_006650 DNA replication factor C, large subunit AAA
## 13 GQ602_006652 Replication factor C subunit 5 AAA
## 14 GQ602_006690 reptin AAA
## 15 GQ602_007264 cell division control protein Cdc6 AAA
## Which DEGs: manip.down
## Cluster: 1
## n(genes) = 244
##
## [1] "Loading annotation file for Ophiocordyceps camponoti-floridani"
## [1] "Done."
## [1] "Testing for enrichment..."
##
## n(overrepresented terms) = 9
##
## # A tibble: 9 x 3
## # Groups: gene_name [9]
## gene_name gene_desc annot_desc
## <chr> <chr> <chr>
## 1 GQ602_000370 proteasome subunit alpha 3 Proteasome; Proteasome_A_N
## 2 GQ602_000752 proteasome subunit alpha type 6 Proteasome; Proteasome_A_N
## 3 GQ602_001330 proteasome subunit alpha type-4 Proteasome; Proteasome_A_N
## 4 GQ602_004171 proteasome component PUP2 Proteasome; Proteasome_A_N
## 5 GQ602_004412 Proteasome subunit beta type-3 Proteasome
## 6 GQ602_004910 proteasome component pre6 Proteasome; Proteasome_A_N
## 7 GQ602_005128 proteasome subunit alpha type 2 Proteasome; Proteasome_A_N
## 8 GQ602_006384 proteasome component PRE3 precursor Proteasome
## 9 GQ602_007382 proteasome component C5 Proteasome
## Which DEGs: manip.down
## Cluster: 2
## n(genes) = 420
##
## [1] "Loading annotation file for Ophiocordyceps camponoti-floridani"
## [1] "Done."
## [1] "Testing for enrichment..."
##
## n(overrepresented terms) = 13
##
## # A tibble: 13 x 3
## # Groups: gene_name [13]
## gene_name gene_desc annot_desc
## <chr> <chr> <chr>
## 1 GQ602_000905 glucose 1-dehydrogenase adh_short_C2;…
## 2 GQ602_001644 monosaccharide transporter Sugar_tr
## 3 GQ602_002426 short-chain dehydrogenase/reductase family prote… adh_short_C2;…
## 4 GQ602_002722 Major facilitator superfamily transporter Sugar_tr
## 5 GQ602_003015 related to short-chain alcohol dehydrogenase adh_short_C2;…
## 6 GQ602_003481 D-arabinitol dehydrogenase ArbD adh_short_C2;…
## 7 GQ602_003561 Major facilitator superfamily domain, general su… Sugar_tr
## 8 GQ602_005344 galactose-proton symporter Sugar_tr
## 9 GQ602_005546 sugar transporter family protein Sugar_tr
## 10 GQ602_006178 putative short-chain dehydrogenase reductase sdr… adh_short_C2;…
## 11 GQ602_006336 3-ketoacyl-CoA reductase adh_short_C2;…
## 12 GQ602_006422 Catalase-peroxidase Sugar_tr
## 13 GQ602_006607 sugar porter (SP) family MFS transporter Sugar_tr
## Which DEGs: manip.down
## Cluster: 3
## n(genes) = 371
##
## [1] "Loading annotation file for Ophiocordyceps camponoti-floridani"
## [1] "Done."
## [1] "Testing for enrichment..."
##
## n(overrepresented terms) = 5
##
## # A tibble: 5 x 3
## # Groups: gene_name [5]
## gene_name gene_desc annot_desc
## <chr> <chr> <chr>
## 1 GQ602_003056 probable kinesin-related protein KLPA Kinesin; Microtub_bd
## 2 GQ602_003184 kinesin family protein Kinesin; Microtub_bd
## 3 GQ602_004544 Kinesin-like protein Kinesin; Microtub_bd
## 4 GQ602_004862 kinesin family member 1/13/14 Kinesin; Microtub_bd
## 5 GQ602_005891 kinesin family protein Kinesin; Microtub_bd
## Which DEGs: manip.down
## Cluster: 4
## n(genes) = 588
##
## [1] "Loading annotation file for Ophiocordyceps camponoti-floridani"
## [1] "Done."
## [1] "Testing for enrichment..."
##
## n(overrepresented terms) = 44
##
## # A tibble: 44 x 3
## # Groups: gene_name [44]
## gene_name gene_desc annot_desc
## <chr> <chr> <chr>
## 1 GQ602_000016 hexose transporter-like protein MFS_1; Sugar…
## 2 GQ602_000379 fungal specific transcription factor domain-conta… Zn_clus
## 3 GQ602_000447 multidrug resistance-type transporter protein ABC_tran
## 4 GQ602_000498 C6 zinc finger domain containing protein Zn_clus
## 5 GQ602_000785 aminotriazole resistance protein MFS_1
## 6 GQ602_000837 related to ATP-binding cassette protein ABC_tran
## 7 GQ602_001122 fungal specific transcription factor domain-conta… Zn_clus
## 8 GQ602_001147 MFS transporter MFS_1
## 9 GQ602_001343 DNA excision repair protein ERCC-6 SNF2_N
## 10 GQ602_001478 ATPase components of ABC transporter ABC_tran
## # … with 34 more rows